
# =============================================================================
# Fraction of US pop within 5km of PC
# Fraction of Clarity sample living within 5km of a PC
# =============================================================================
cloc = pd.read_parquet(cdd['p_d_cl_c'] + r'\cloc_q.parquet')
cif = pd.read_parquet(cdd['p_d_cl_c'] + r'\cif_q.parquet')
controls = pd.read_parquet(cdd['p_d_acs_zip'] + r'\acs_2014_2022_s3.parquet')
treat = pd.read_parquet(cdd['p_d_geo_tcp'] + r'\treat_25k_zip_q.parquet')

#zip codes w/in radius.
z5k = list(treat[(treat.date==dt.datetime(2021,6,30))&(treat.intensity<5000)]['zip'].unique())
z10k = list(treat[(treat.date==dt.datetime(2021,6,30))&(treat.intensity<10000)]['zip'].unique())

#Fraction of US Pop w/in 5k and 10k
pop5k = controls[(controls.year==2019)&(controls.zip.isin(z5k))]['population'].sum() /  controls[(controls.year==2019)]['population'].sum()
pop10k = controls[(controls.year==2019)&(controls.zip.isin(z10k))]['population'].sum() /  controls[(controls.year==2019)]['population'].sum()

#Fraction of Clarity Sample w/in 5k and 10k
cl5k = cloc[(cloc.dateq==dt.datetime(2020,12,31))&(cloc.zip.isin(z5k))]['id'].nunique() /  cloc[(cloc.dateq==dt.datetime(2020,12,31))]['id'].nunique()
cl10k = cloc[(cloc.dateq==dt.datetime(2020,12,31))&(cloc.zip.isin(z10k))]['id'].nunique() /  cloc[(cloc.dateq==dt.datetime(2020,12,31))]['id'].nunique()
cloc[(cloc.dateq==dt.datetime(2020,12,31))]['id'].nunique()

#Get the income quantiles for the clarity sample.
a = cif[cif.dateq==dt.datetime(2020,12,31)]['income'].describe(percentiles=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.75,0.8,0.9])

